﻿using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using HtmlDoc = HtmlAgilityPack.HtmlDocument;
using WebFunctions;
using Newtonsoft.Json;
using System.IO;
using System.Windows.Forms;

namespace AutoIndustryFiles
{
    class sh_fanucScraper:FileLinksScraper2
    {
        public sh_fanucScraper()
        {
            this.MainPage = "http://www.shanghai-fanuc.com.cn/index.php?option=com_djcatalog2&view=items&cid=1&Itemid=63&lang=zh";
            this.CompanyName = "上海FANUC";
            this.CompanyUrl = "http://www.shanghai-fanuc.com.cn/index.php?option=com_djcatalog2&view=items&cid=1&Itemid=63&lang=zh";
            this.ColumnName = "首页 > 产品";
            //this.ReviseXPath += new OnReviseXPath(FuJiScraper_ReviseXPath);            
        }

        public override void GenerateAllLinks(string param)
        {
            /*
             * //div//li[@class="active parent level0"]/a/span innerText  一级
             * /div//li[@class="active level1"]/a/span innerText          二级
             * //td[@class="djcat_td_title"]/a  产品链接 href 名称 innerText
             */

            var files = from file in Directory.EnumerateFiles(@"D:\外包项目\自动化资料下载\Output\shanghai_fanuc\", "*.php", SearchOption.AllDirectories)
                        orderby file ascending
                        select new
                        {
                            File = file
                        }
;
            if (files.Count() == 0)
            {
                return;
            }

            HtmlDoc doc = new HtmlDoc();
            foreach (var f in files)
            {
                string html = File.ReadAllText(f.File, Encoding.GetEncoding("UTF-8"));

                doc.LoadHtml(html);

                HtmlAgilityPack.HtmlNodeCollection hnc_product = doc.DocumentNode.SelectNodes(@"//td[@class=""djcat_td_title""]/a");

                if (hnc_product == null)
                    continue;

                string leve0 = doc.DocumentNode.SelectSingleNode(@"//div//li[@class=""active parent level0""]/a/span").InnerText;
                HtmlAgilityPack.HtmlNode nd = doc.DocumentNode.SelectSingleNode(@"//div//li[@class=""active level1""]/a/span");

                string leve1 = "";
                if (nd != null)
                    leve1 = nd.InnerText;

                foreach (HtmlAgilityPack.HtmlNode node in hnc_product)
                {
                    string productName = node.InnerText.Trim();
                    string productUrl = node.GetAttributeValue("href", "");
                    productUrl = FormatUrl(productUrl, "http://www.shanghai-fanuc.com.cn");

                    bool bv = false;
                    UpdateMsg("sh_fanucScraper", productUrl, ref bv);
                    string html_product = WebFuncs.OpenUrlEx(productUrl, "UTF-8");
                    HtmlDoc docProduct = new HtmlDoc();
                    docProduct.LoadHtml(html_product);
                    HtmlAgilityPack.HtmlNodeCollection hnc_docs = docProduct.DocumentNode.SelectNodes(@"//tr[td[@bgcolor=""#cccccc"" or @class=""robotdatatitle""]]");
                    if (hnc_docs != null)
                        foreach (HtmlAgilityPack.HtmlNode node_doc in hnc_docs)
                        {
                            if (node_doc.InnerText.IndexOf("样本") >= 0 || node_doc.InnerText.IndexOf("视频") >= 0)
                            {
                                HtmlAgilityPack.HtmlNode next = node_doc.NextSibling;
                                while (next.Name.ToUpper() != "TR")
                                    next = next.NextSibling;
                                HtmlAgilityPack.HtmlNode td_node = next.SelectSingleNode("td[1]");

                                while (td_node.GetAttributeValue("bgcolor", "") != "#cccccc" && td_node.GetAttributeValue("class", "") != "robotdatatitle")
                                {
                                    HtmlAgilityPack.HtmlNodeCollection hnc_hrefs = next.SelectNodes(".//a");
                                    if (hnc_hrefs == null)
                                    {
                                        do
                                        {
                                            next = next.NextSibling;
                                            if (next == null)
                                                break;
                                        } while (next.Name.ToUpper() != "TR" || next.SelectSingleNode("td[1]") == null);

                                        if (next == null)
                                            break;

                                        td_node = next.SelectSingleNode("td[1]");
                                        

                                        continue;
                                    }
                                    
                                    foreach(HtmlAgilityPack.HtmlNode href in hnc_hrefs)
                                        FileLinks.Add(new FileLink(
                                            FileLinks.Count().ToString(),
                                            FormatUrl(href.GetAttributeValue("href", ""), "http://www.shanghai-fanuc.com.cn"),
                                            node_doc.InnerText.IndexOf("样本") >= 0 ? productName  : next.SelectSingleNode("td/span[1]").InnerText.Trim(),
                                            leve0 + "->" + leve1 + "->" + productName,
                                            true,
                                            node_doc.InnerText.IndexOf("样本") >= 0 ? "样本" : "视频", "", 
                                            node_doc.InnerText.IndexOf("样本") >= 0 ? href.InnerText.Trim() : ""));

                                    next = next.NextSibling;
                                    if (next == null)
                                        break;
                                }
                            }
                        }
                }
            }
        }
    }
}
